Set up
Packages
require(tidyr)
require(dplyr)
require(magrittr)
require(ggplot2)
require(bayesplot)
require(ape)
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
require(brms)
require(ggmcmc)
require(knitr)
require(phytools)
require(cowplot)
require(RColorBrewer)
require(ggtree)
require(car)
require(xtable)
require(phytools)
# Functions
nrmse <- function(obs, pred, type = "sd") {
squared_sums <- sum((obs - pred)^2)
mse <- squared_sums/length(obs)
rmse <- sqrt(mse)
if (type == "sd") nrmse <- rmse/sd(obs)
if (type == "mean") nrmse <- rmse/mean(obs)
if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
nrmse <- round(nrmse, 3)
return(nrmse)
}
rmse <- function(obs, pred){
sqrt(mean((obs - pred)^2))
}
inv_logit <- function(x) {exp(x)/(1+exp(x))}
Loading data
data <- read.csv("./sp.threat.nat.csv", as.is=F)
nrow(data)# 395
range(data$vetted)# 0-3346
nrow(data[data$vetted>0,])# 317
### Magellon tree
tree <- read.tree("./Ramirez_etal_pruned.tre")
length(tree$tip.label)# 442
# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label)
# Getting family age
min_above_zero <- function(x){
if (length(x)>1) {
min(x[x>0])
} else {0}
}
# Calculate family age
coph <- cophenetic(tree)
fam_age <- sapply(rownames(coph), function(fam)
min_above_zero(coph[rownames(coph)%in%fam,])
)
ages <- data.frame(family=names(fam_age), age=as.numeric(fam_age)/2)
ages %<>% arrange(family)
data <- left_join(data,ages)
# Calculating simple diversification metric - log(N+1)/age
data$div_simple <- with(data, log(species)/age)
## Calculating Method of Moments diversification
# https://bio.libretexts.org/Bookshelves/Evolutionary_Developmental_Biology/Book%3A_Phylogenetic_Comparative_Methods_(Harmon)/11%3A_Fitting_Birth-Death_Models/11.02%3A_Clade_Age_and_Diversity
#extract family stem ages
ages<-NULL
for (x in 1: length(tree$tip.label)){
sp<-tree$tip.label[x]
edge<-tree$edge.length[tree$edge[,2]==x]
ages<-rbind(ages,(data.frame(sp,edge)))
}
data.rates <- merge(data, ages,by.x="family", by.y = "sp")
#eugh my numerics edge lengths have been converted to factors
data.rates$edge<-as.numeric(as.character(data.rates$edge))
#sanity check
# head(ages[order(ages$sp),])
# head(data.rates)
ages[ages$edge==max(ages$edge),]
#Calculate rates assuming different extinction fractions (e)
e<-0
r0<-log(data.rates$species*(1-e)+e)/data.rates$edge
e<-0.5
r0.5<-log(data.rates$species*(1-e)+e)/data.rates$edge
e<-0.9
r0.9<-log(data.rates$species*(1-e)+e)/data.rates$edge
#just for a quick look-see
# plot(r0, r0.9)
data<-cbind(data.rates, r0, r0.5, r0.9)
pairs(data[,c("r0","r0.5","r0.9")])

unique(data[data$div_simple==max(data$div_simple),])
data$div_mom <- data$r0.9
with(data, plot(div_simple, div_mom))

with(data, cor(div_simple, div_mom))#0.974
# Setting diversification as div_simple
data$diversification <- data$div_simple
# creating another family level variable (for non-phylogenetic family level effects)
data$family_name <- data$family
# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label)
# Identify families in tree with no data
setdiff(tree$tip.label, data$family)
# remove any missing families in data
data <- data[data$family%in%tree$tip.label,]
# remove any families in tree with no data
tree <- drop.tip(tree, setdiff(tree$tip.label, as.character(data$family)))
# phylogenetic correlation structure
phy_cov <- vcv(tree, corr=TRUE)
nrow(data)#395
nrow(data[data$vetted>0,])#317
# instances where threatened is greater than vetted
# nrow(data[data$threatened>data$vetted,])#0
# instances where threatened is greater than species
# nrow(data[data$threatened>data$species,])#0
Assessing multicollinearity
vif(lm(threatened ~ naturalized_scaled + diversification_scaled + age_scaled + range.size, data=data_vetted))
## naturalized_scaled diversification_scaled age_scaled
## 3.529148 3.496445 1.674901
## range.size
## 3.734682
vif(lm(threatened ~ diversification_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size, data=data_vetted))
## diversification_scaled age_scaled hybrids
## 3.315630 1.687157 1.137351
## climate.sum animal asexual
## 1.726952 1.108766 1.268796
## annual fleshy.fruit herbaceous
## 1.753944 1.150634 1.393140
## range.size
## 3.417065
vif(lm(naturalized ~ threatened_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size, data=data_vetted))
## threatened_scaled age_scaled hybrids climate.sum
## 1.935678 1.020602 1.147107 1.697694
## animal asexual annual fleshy.fruit
## 1.104978 1.302405 1.756528 1.154597
## herbaceous range.size
## 1.423269 2.988637
# with horticultural species
vif(lm(threatened ~ diversification_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data_vetted))
## diversification_scaled age_scaled hybrids
## 3.854150 1.741455 1.142688
## climate.sum animal asexual
## 1.789898 1.130522 1.334724
## annual fleshy.fruit herbaceous
## 1.822324 1.152992 1.393760
## range.size hort.species
## 4.172669 4.766442
vif(lm(naturalized ~ threatened_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data_vetted))
## threatened_scaled age_scaled hybrids climate.sum
## 2.330926 1.033480 1.157313 1.794180
## animal asexual annual fleshy.fruit
## 1.125600 1.351916 1.846114 1.155710
## herbaceous range.size hort.species
## 1.438503 3.870661 4.937729
with(data_vetted, cor(diversification_scaled, range.size))
## [1] 0.6882094
# full dataset
vif(lm(threatened ~ diversification_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data))
## diversification_scaled age_scaled hybrids
## 3.693999 1.509428 1.134472
## climate.sum animal asexual
## 1.742312 1.102910 1.362397
## annual fleshy.fruit herbaceous
## 1.734637 1.116877 1.276750
## range.size hort.species
## 4.503627 4.717285
vif(lm(naturalized ~ diversification_scaled + age_scaled + hybrids + climate.sum + animal + asexual + annual + fleshy.fruit + herbaceous + range.size + hort.species, data=data))
## diversification_scaled age_scaled hybrids
## 3.693999 1.509428 1.134472
## climate.sum animal asexual
## 1.742312 1.102910 1.362397
## annual fleshy.fruit herbaceous
## 1.734637 1.116877 1.276750
## range.size hort.species
## 4.503627 4.717285
with(data, plot(diversification_scaled ~ range.size))

with(data, cor(diversification_scaled, range.size))#0.726
## [1] 0.7264469
 Â
Exploration of priors
Visualization of brms default priors (blue) compared to custom priors (yellow) inspired by McElreath’s book Statistical Rethinking.
Priors for the slope parameters:
prior <- runif(n=10000, min=-100, max=100) # brms default is actually -Inf to +Inf
p <- inv_logit(prior)
prior2 <- rnorm(n=10000, 0, 1.5) # McElreath
p2 <- inv_logit(prior2)
plot(density(p, adj=0.1), xlim=c(0,1), col="#56B4E9", main="", xlab=expression(paste("inv_logit (",beta,")")))
lines(density(p2, adj=0.1), col="#E69F00")

The default improper uniform prior used by brms is flat on the log-odd scale, so when converted to the probabilty scale, puts excess mass near 0 and 1. A Normal(0, 1.5) prior is more reasonable, having a flatter distribution with a mode at 0.5 on the probablity scale.
Priors for the family-level variance parameters:
prior <- rstudent_t(n=10000, df=3, mu=0, sigma=10) # brms default
prior <- prior[prior>0]
# p <- inv_logit(prior)
prior2 <- rnorm(n=10000, 0, 1) # McElreath
prior2 <- prior2[prior2>0]
# p2 <- inv_logit(prior2)
plot(density(prior2, adj=0.1), xlim=c(0,10), col="#E69F00", main="", xlab=expression(paste(sigma)))
lines(density(prior, adj=0.1), col="#56B4E9")

## png
## 2
The brms prior is extremely long tailed. Considering we are fitting a non-linear model in which changes in log-odds over over 4 have a diminishing influence due to the ceiling effect of the binomial distribution. With this in mind, a prior of Normal(0,1) constrains the posterior to a more realistic range, and is likely to improve sampling efficiency.
Threat models (with IUCN evaluated species)
Simple threat model - brms default priors
if (!file.exists("./vet_threatened_naturalized_10k.rds")) {
vet_threatened_nat <- brm(threatened | trials(vetted) ~ naturalized_scaled + diversification_scaled + age_scaled + range.size + (1|family) + (1|family_name),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_threatened_nat, "./vet_threatened_naturalized_10k.rds")
} else { vet_threatened_nat <- readRDS("./vet_threatened_naturalized_10k.rds")}
# prior_summary(vet_threatened_nat, all=TRUE)
# Default if for b coefficients to have improper priors
outcome <- summary(vet_threatened_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size", "SD brownian effects","SD family specific effects")
vet_threatened_nat_plot <- stanplot(vet_threatened_nat, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nat_plot

pdf("./plots_tables/vet_threatened_nat_plot.pdf", width=5, height=4)
vet_threatened_nat_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Number of naturalized species |
-0.35 |
0.00 |
0.21 |
-0.76 |
-0.50 |
-0.36 |
-0.21 |
0.05 |
2691.46 |
1.00 |
| Diversification rate |
1.81 |
0.01 |
0.30 |
1.23 |
1.61 |
1.80 |
2.01 |
2.39 |
1517.16 |
1.00 |
| Family age |
1.03 |
0.00 |
0.23 |
0.57 |
0.88 |
1.03 |
1.18 |
1.47 |
3247.56 |
1.00 |
| Range size |
-0.70 |
0.00 |
0.23 |
-1.16 |
-0.86 |
-0.70 |
-0.54 |
-0.24 |
4107.40 |
1.00 |
| SD brownian effects |
0.61 |
0.01 |
0.24 |
0.08 |
0.46 |
0.63 |
0.78 |
1.01 |
292.19 |
1.02 |
| SD family specific effects |
0.59 |
0.01 |
0.18 |
0.14 |
0.49 |
0.62 |
0.72 |
0.87 |
296.54 |
1.02 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nat.tex")
##Posterior predictive checks
pp_vet_threatened_nat <- brms::posterior_predict(vet_threatened_nat)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_nat$data$threatened, pp_vet_threatened_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_threatened_nat <- apply(pp_vet_threatened_nat, 1, function(x) nrmse(pred=x,obs=vet_threatened_nat$data$threatened, type="iq"))
mean(nrmse_vet_threatened_nat)
## [1] 0.3117106
sd(nrmse_vet_threatened_nat)
## [1] 0.0357692
# RMSE
rmse_vet_threatened_nat <- apply(pp_vet_threatened_nat, 1, function(x) rmse(pred=x,obs=vet_threatened_nat$data$threatened))
mean(rmse_vet_threatened_nat)
## [1] 6.545878
sd(rmse_vet_threatened_nat)
## [1] 0.7511597
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_nat, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.5 0.27 0.01 0.98 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Simple threat model - prior / posterior plots
# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale
tibble(x = seq(from = -10, to = 10, by = .01)) %>%
ggplot() +
# the prior
# improper uniform prior restricted to (-10,10) to aid visualization
geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_nat),
aes(x = b_age_scaled),
fill = "#56B4E9", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(-5, 5)) +
labs(x="Beta Age (brms priors)", y="Density") +
theme_bw() -> prior_post_1_beta
prior_post_1_beta

# Plotting influence of prior on posterior for a sigma parameter
# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%
ggplot() +
# the prior
geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_nat),
aes(x = sd_family_name__Intercept),
fill = "#56B4E9", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(0, 5)) +
labs(x="SD Family Effects (brms priors)", y="Density") +
theme_bw() -> prior_post_1_sigma
prior_post_1_sigma

Simple threat model - custom priors (based on McElreath)
if (!file.exists("./vet_threatened_naturalized_p2_10k.rds")) {
vet_threatened_nat_p2 <- brm(threatened | trials(vetted) ~ naturalized_scaled + diversification_scaled + age_scaled + range.size + (1|family) + (1|family_name),
data=data_vetted, family=binomial(),
# custom priors based on McElreath
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1.5), class = b),
prior(normal(0, 1), class = sd)),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_threatened_nat_p2, "./vet_threatened_naturalized_p2_10k.rds")
} else { vet_threatened_nat_p2 <- readRDS("./vet_threatened_naturalized_p2_10k.rds")}
# prior_summary(vet_threatened_nat_p2, all=TRUE)
outcome <- summary(vet_threatened_nat_p2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size", "SD brownian effects","SD family specific effects")
vet_threatened_nat_p2_plot <- stanplot(vet_threatened_nat_p2, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nat_p2_plot

pdf("./plots_tables/vet_threatened_nat_p2_plot.pdf", width=5, height=4)
vet_threatened_nat_p2_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Number of naturalized species |
-0.33 |
0.00 |
0.20 |
-0.73 |
-0.47 |
-0.33 |
-0.19 |
0.07 |
3778.16 |
1.00 |
| Diversification rate |
1.71 |
0.01 |
0.29 |
1.15 |
1.51 |
1.70 |
1.91 |
2.27 |
2032.84 |
1.00 |
| Family age |
0.95 |
0.00 |
0.23 |
0.51 |
0.80 |
0.95 |
1.11 |
1.40 |
3338.78 |
1.00 |
| Range size |
-0.66 |
0.00 |
0.23 |
-1.11 |
-0.82 |
-0.66 |
-0.50 |
-0.21 |
3707.12 |
1.00 |
| SD brownian effects |
0.63 |
0.01 |
0.24 |
0.10 |
0.49 |
0.66 |
0.81 |
1.02 |
518.90 |
1.01 |
| SD family specific effects |
0.57 |
0.01 |
0.19 |
0.10 |
0.46 |
0.60 |
0.70 |
0.85 |
494.49 |
1.01 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nat_p2.tex")
##Posterior predictive checks
pp_vet_threatened_nat_p2 <- brms::posterior_predict(vet_threatened_nat_p2)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_nat_p2$data$threatened, pp_vet_threatened_nat_p2[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_threatened_nat_p2 <- apply(pp_vet_threatened_nat_p2, 1, function(x) nrmse(pred=x,obs=vet_threatened_nat_p2$data$threatened, type="iq"))
mean(nrmse_vet_threatened_nat_p2)
## [1] 0.311648
sd(nrmse_vet_threatened_nat_p2)
## [1] 0.03609379
# RMSE
rmse_vet_threatened_nat_p2 <- apply(pp_vet_threatened_nat_p2, 1, function(x) rmse(pred=x,obs=vet_threatened_nat_p2$data$threatened))
mean(rmse_vet_threatened_nat_p2)
## [1] 6.544551
sd(rmse_vet_threatened_nat_p2)
## [1] 0.7578132
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_nat_p2, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.53 0.27 0.01 0.99 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale
tibble(x = seq(from = -10, to = 10, by = .01)) %>%
ggplot() +
# the prior
geom_ribbon(aes(x = x, ymin = 0, ymax = dnorm(x,0,1.5)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_nat_p2),
aes(x = b_age_scaled),
fill = "#E69F00", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(-5, 5)) +
labs(x="Beta Age - custom priors", y="Density") +
theme_bw() -> prior_post_2_beta
prior_post_2_beta

# Plotting influence of prior on posterior for a sigma parameter
# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%
ggplot() +
# the prior
geom_ribbon(aes(x = x, ymin = 0, ymax = dnorm(x, 0,1)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_nat_p2),
aes(x = sd_family_name__Intercept),
fill = "#E69F00", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(0, 5)) +
labs(x="SD Family Effects (brms priors)", y="Density") +
theme_bw() -> prior_post_2_sigma
prior_post_2_sigma

Full threat model (IUCN vetted species) - brms default priors
if (!file.exists("./vet_threatened_10k.rds")) {
vet_threatened <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.98,max_treedepth=13),cores=4)
# no divergent transitions
saveRDS(vet_threatened, "./vet_threatened_10k.rds")
} else { vet_threatened <- readRDS("./vet_threatened_10k.rds")}
# prior_summary(vet_threatened, all=FALSE)
outcome <- summary(vet_threatened$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
vet_threatened_plot <- stanplot(vet_threatened, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_plot

pdf("./plots_tables/vet_threatened_plot.pdf", width=5, height=4)
vet_threatened_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.68 |
0.00 |
0.27 |
1.16 |
1.51 |
1.68 |
1.86 |
2.21 |
3760.75 |
1.00 |
| Famly age |
0.95 |
0.00 |
0.22 |
0.51 |
0.80 |
0.96 |
1.11 |
1.39 |
4251.86 |
1.00 |
| Range size |
-0.77 |
0.00 |
0.24 |
-1.24 |
-0.92 |
-0.77 |
-0.61 |
-0.30 |
4574.07 |
1.00 |
| Hybrids |
-0.22 |
0.00 |
0.12 |
-0.46 |
-0.30 |
-0.22 |
-0.14 |
0.01 |
5022.40 |
1.00 |
| Animal pollinated |
0.67 |
0.00 |
0.21 |
0.27 |
0.53 |
0.67 |
0.82 |
1.08 |
4168.52 |
1.00 |
| Fleshy fruit |
-0.10 |
0.00 |
0.20 |
-0.48 |
-0.24 |
-0.10 |
0.03 |
0.29 |
4642.28 |
1.00 |
| Climate sum |
-0.05 |
0.00 |
0.08 |
-0.21 |
-0.10 |
-0.05 |
0.01 |
0.12 |
4884.11 |
1.00 |
| Herbaceous |
-0.03 |
0.00 |
0.18 |
-0.37 |
-0.15 |
-0.03 |
0.09 |
0.32 |
4949.48 |
1.00 |
| Annual |
0.07 |
0.00 |
0.17 |
-0.26 |
-0.04 |
0.07 |
0.18 |
0.39 |
4593.64 |
1.00 |
| Axsexual |
-0.12 |
0.00 |
0.14 |
-0.39 |
-0.22 |
-0.12 |
-0.02 |
0.17 |
4539.44 |
1.00 |
| SD brownian effects |
0.51 |
0.01 |
0.24 |
0.05 |
0.33 |
0.52 |
0.70 |
0.96 |
562.12 |
1.01 |
| SD family specific effects |
0.63 |
0.01 |
0.16 |
0.22 |
0.54 |
0.65 |
0.74 |
0.85 |
628.17 |
1.01 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened.tex")
##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.311238
sd(nrmse_vet)
## [1] 0.035023
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened$data$threatened))
mean(rmse_vet)
## [1] 6.535986
sd(rmse_vet)
## [1] 0.7354639
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.4 0.27 0 0.95 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)
Full threat model (IUCN vetted species) - brms prior / posterior plots
# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale
tibble(x = seq(from = -10, to = 10, by = .01)) %>%
ggplot() +
# the prior
# improper uniform prior restricted to (-10,10) to aid visualization
geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened),
aes(x = b_age_scaled),
fill = "#56B4E9", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(-5, 5)) +
labs(x="Beta Age (brms priors)", y="Density") +
theme_bw() -> prior_post_3_beta
prior_post_3_beta

# Plotting influence of prior on posterior for a sigma parameter
# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%
ggplot() +
# the prior
geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened),
aes(x = sd_family_name__Intercept),
fill = "#56B4E9", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(0, 5)) +
labs(x="SD Family Effects (brms priors)", y="Density") +
theme_bw() -> prior_post_3_sigma
prior_post_3_sigma

Full threat model (IUCN vetted species) - custom priors (based on McElreath)
if (!file.exists("./vet_threatened_p2_10k.rds")) {
vet_threatened_p2 <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
# custom priors based on McElreath
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1.5), class = b),
prior(normal(0, 1), class = sd)),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.98,max_treedepth=13),cores=4)
# no divergent transitions
saveRDS(vet_threatened_p2, "./vet_threatened_p2_10k.rds")
} else { vet_threatened_p2 <- readRDS("./vet_threatened_p2_10k.rds")}
# summary(vet_threatened_p2)
# prior_summary(vet_threatened_p2, all=FALSE)
outcome <- summary(vet_threatened_p2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
vet_threatened_p2_plot <- stanplot(vet_threatened_p2, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_p2_plot

pdf("./plots_tables/vet_threatened_p2_plot.pdf", width=5, height=4)
vet_threatened_p2_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.60 |
0.00 |
0.26 |
1.09 |
1.43 |
1.60 |
1.77 |
2.11 |
3227.17 |
1.00 |
| Famly age |
0.90 |
0.00 |
0.21 |
0.47 |
0.75 |
0.90 |
1.04 |
1.31 |
3598.23 |
1.00 |
| Range size |
-0.72 |
0.00 |
0.23 |
-1.17 |
-0.88 |
-0.72 |
-0.57 |
-0.28 |
4081.70 |
1.00 |
| Hybrids |
-0.22 |
0.00 |
0.12 |
-0.46 |
-0.30 |
-0.22 |
-0.14 |
0.01 |
4785.79 |
1.00 |
| Animal pollinated |
0.66 |
0.00 |
0.21 |
0.26 |
0.52 |
0.66 |
0.80 |
1.06 |
4431.56 |
1.00 |
| Fleshy fruit |
-0.09 |
0.00 |
0.19 |
-0.46 |
-0.22 |
-0.09 |
0.04 |
0.29 |
4559.50 |
1.00 |
| Climate sum |
-0.04 |
0.00 |
0.08 |
-0.21 |
-0.10 |
-0.04 |
0.01 |
0.12 |
4616.43 |
1.00 |
| Herbaceous |
-0.03 |
0.00 |
0.17 |
-0.37 |
-0.14 |
-0.02 |
0.09 |
0.31 |
4648.53 |
1.00 |
| Annual |
0.06 |
0.00 |
0.16 |
-0.26 |
-0.05 |
0.05 |
0.17 |
0.38 |
4984.73 |
1.00 |
| Axsexual |
-0.11 |
0.00 |
0.14 |
-0.39 |
-0.20 |
-0.11 |
-0.01 |
0.18 |
4559.65 |
1.00 |
| SD brownian effects |
0.50 |
0.02 |
0.24 |
0.04 |
0.31 |
0.52 |
0.68 |
0.94 |
225.51 |
1.01 |
| SD family specific effects |
0.63 |
0.01 |
0.16 |
0.23 |
0.55 |
0.65 |
0.74 |
0.85 |
261.73 |
1.01 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_p2.tex")
##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_p2)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_p2$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_p2$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3111226
sd(nrmse_vet)
## [1] 0.03603959
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_p2$data$threatened))
mean(rmse_vet)
## [1] 6.533531
sd(rmse_vet)
## [1] 0.7569331
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_p2, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.4 0.27 0 0.94 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)
Full threat model (IUCN vetted species) - custom prior / posterior plots
# Plotting influence of prior on posterior for a beta parameter
# on the log-odds scale
tibble(x = seq(from = -10, to = 10, by = .01)) %>%
ggplot() +
# the prior
# improper uniform prior restricted to (-10,10) to aid visualization
geom_ribbon(aes(x = x, ymin = 0, ymax = dunif(x,-10,10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_p2),
aes(x = b_age_scaled),
fill = "#E69F00", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(-5, 5)) +
labs(x="Beta Age (brms priors)", y="Density") +
theme_bw() -> prior_post_4_beta
prior_post_4_beta

# Plotting influence of prior on posterior for a sigma parameter
# SD Family Effect
tibble(x = seq(from = 0, to = 6, by = .01)) %>%
ggplot() +
# the prior
geom_ribbon(aes(x = x, ymin = 0, ymax = dstudent_t(x, df=3, mu=0, sigma=10)),
fill = "#A9A9A9", alpha = 1/2) +
# the posterior
geom_density(data = posterior_samples(vet_threatened_p2),
aes(x = sd_family_name__Intercept),
fill = "#E69F00", size = 0, alpha = 1/2) +
scale_y_continuous() +
coord_cartesian(xlim = c(0, 5)) +
labs(x="SD Family Effects (brms priors)", y="Density") +
theme_bw() -> prior_post_4_sigma
prior_post_4_sigma

Naturalized model
if (!file.exists("./fit_nat_10k.rds")) {
fit_nat <- brm(naturalized | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
data=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95, max_treedepth=11),cores=4)
saveRDS(fit_nat, "./fit_nat_10k.rds")
} else { fit_nat <- readRDS("./fit_nat_10k.rds")}
outcome <- summary(fit_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
fit_nat_plot <- stanplot(fit_nat, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_plot

pdf("./plots_tables/fit_nat_plot.pdf", width=5, height=4)
fit_nat_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
-2.34 |
0.00 |
0.28 |
-2.89 |
-2.53 |
-2.34 |
-2.16 |
-1.81 |
4678.85 |
1 |
| Famly age |
-1.51 |
0.00 |
0.25 |
-2.00 |
-1.69 |
-1.51 |
-1.35 |
-1.04 |
5007.11 |
1 |
| Range size |
1.61 |
0.00 |
0.27 |
1.10 |
1.43 |
1.61 |
1.79 |
2.15 |
4489.47 |
1 |
| Hybrids |
0.27 |
0.00 |
0.13 |
0.02 |
0.18 |
0.27 |
0.35 |
0.52 |
5005.29 |
1 |
| Animal pollinated |
-0.41 |
0.00 |
0.27 |
-0.94 |
-0.59 |
-0.41 |
-0.23 |
0.12 |
4966.08 |
1 |
| Fleshy fruit |
-0.07 |
0.00 |
0.24 |
-0.55 |
-0.23 |
-0.06 |
0.09 |
0.39 |
4740.17 |
1 |
| Climate sum |
0.18 |
0.00 |
0.10 |
-0.03 |
0.11 |
0.17 |
0.25 |
0.38 |
4295.60 |
1 |
| Herbaceous |
0.65 |
0.00 |
0.22 |
0.23 |
0.50 |
0.65 |
0.80 |
1.09 |
5005.66 |
1 |
| Annual |
0.08 |
0.00 |
0.20 |
-0.31 |
-0.05 |
0.08 |
0.21 |
0.47 |
4787.92 |
1 |
| Axsexual |
0.36 |
0.00 |
0.18 |
0.01 |
0.24 |
0.36 |
0.48 |
0.71 |
5067.27 |
1 |
| SD brownian effects |
1.19 |
0.01 |
0.18 |
0.79 |
1.07 |
1.21 |
1.32 |
1.49 |
895.76 |
1 |
| SD family specific effects |
0.51 |
0.01 |
0.23 |
0.05 |
0.36 |
0.53 |
0.68 |
0.91 |
620.61 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat.tex")
##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=fit_nat$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.511576
sd(nrmse_nat)
## [1] 0.06611309
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat$data$naturalized))
mean(rmse_nat)
## [1] 7.162065
sd(rmse_nat)
## [1] 0.9255925
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.81 0.15 0.45 1 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# simplified threat model
outcome <- summary(vet_threatened_nat$fit, prob=c(0.025,0.10, 0.90,0.975))
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
to_print <- data.frame(to_print)
names1 <- c("Number of naturalized species","Diversification rate","Family age","Range size",paste("\u03C3", "Brownian effects", sep=" "),paste("\u03C3", "family specific effects", sep=" "))
to_print$variable <- as.factor(names1)
p <- ggplot(to_print, aes(x=variable, y=mean)) + scale_x_discrete(limits = rev(levels(to_print$variable))) +
geom_pointrange(aes(ymin=X2.5., ymax=X97.5.), colour="darkgrey", linetype="dotted") +
geom_pointrange(aes(ymin=X10., ymax=X90.), colour="black") +
coord_flip() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_hline(yintercept = 0) + xlab("") + ylab("") +
theme(legend.title = element_blank())
p
ggsave("./plots_tables/threat_nat_forest.pdf", p , height=3,width=5)
ggsave("./plots_tables/threat_nat_forest.png", p , height=3,width=5)
# combined Threat and Naturalized
threat_out <- summary(vet_threatened$fit, prob=c(0.025,0.10, 0.90,0.975))
threat_out <- threat_out$summary[grep("^[sd,b]_*",rownames(threat_out$summary)),]
threat_out <- threat_out[-1,]
threat_out <- data.frame(threat_out)
threat_out$group <- rev(letters[1:nrow(threat_out)])
threat_out$model <- "Threatened"
nat_out <- summary(fit_nat$fit, prob=c(0.025,0.10, 0.90,0.975))
nat_out <- nat_out$summary[grep("^[sd,b]_*",rownames(nat_out$summary)),]
nat_out <- nat_out[-1,]
nat_out <- data.frame(nat_out)
nat_out$group <- rev(letters[1:nrow(nat_out)])
nat_out$model <- "Naturalized"
labels=c("Diversification rate","Family age", "Range size", "Hybrids", "Animal pollination", "Fleshy fruits", "Climate range variability", "Herbaceous","Annual","Asexual seed production", paste("\u03C3", "Brownian effects", sep=" "),paste("\u03C3", "family specific effects", sep=" "))
threat_out$variable <- labels
nat_out$variable <- labels
coefs <- rbind(threat_out, nat_out)
coefs$overlap <- ((coefs$X2.5. > 0) == (coefs$X97.5. > 0))
coefs$overlap[coefs$overlap==TRUE] <- "n"
coefs$overlap[coefs$overlap==FALSE] <- "y"
top <- coefs$mean[1:(length(coefs$mean)/2)]
bottom <- coefs$mean[((length(coefs$mean)/2)+1):length(coefs$mean)]
coefs$opposite <- ((top > 0) == (bottom > 0))
coefs$opposite[coefs$opposite==TRUE] <- "n"
coefs$opposite[coefs$opposite==FALSE] <- "y"
coefs$both <- paste0(coefs$overlap,coefs$opposite)
labels<-rev(labels)
cols<-c("#88b7c5","#a36666")
p2 <- ggplot(coefs, aes(x=group, y=mean, color=model)) + scale_shape_manual(values=c(17,15,17,15)) + scale_alpha_manual(values=c(1,1,0.4,0.4)) +
geom_pointrange(aes(ymin=X2.5., ymax=X97.5.,shape=both,alpha=both),fatten = 0,linetype="dotted") +
geom_pointrange(aes(ymin=X10., ymax=X90.,shape=both,alpha=both), size=0.75) +
coord_flip() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
geom_hline(yintercept = 0) + xlab("") + ylab("") + theme(legend.position = "none") +
theme(legend.title = element_blank()) + scale_x_discrete(labels=labels) + guides(shape = FALSE, size = FALSE) + scale_colour_manual(values=cols)
p2
ggsave("./plots_tables/combined_forest.pdf", p2 , height=5,width=6)
ggsave("./plots_tables/combined_forest.png", p2 , height=5,width=6)
Alternative Family Effects Structures
Full threat model: no family level effects
if (!file.exists("./vet_threatened_nofam_10k.rds")) {
vet_threatened_nofam <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual,
data=data_vetted, family=binomial(),
iter=10000, thin=4,
control=list(adapt_delta=0.95,max_treedepth=13),cores=4)
saveRDS(vet_threatened_nofam, "./vet_threatened_nofam_10k.rds")
} else { vet_threatened_nofam <- readRDS("./vet_threatened_nofam_10k.rds")}
outcome <- summary(vet_threatened_nofam$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual")
vet_threatened_nofam_plot <- stanplot(vet_threatened_nofam, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nofam_plot

pdf("./plots_tables/vet_threatened_nofam_plot.pdf", width=5, height=4)
vet_threatened_nofam_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.08 |
0 |
0.07 |
0.94 |
1.03 |
1.08 |
1.12 |
1.21 |
4776.17 |
1 |
| Famly age |
1.02 |
0 |
0.06 |
0.90 |
0.97 |
1.02 |
1.06 |
1.14 |
4783.33 |
1 |
| Range size |
-0.21 |
0 |
0.06 |
-0.33 |
-0.25 |
-0.21 |
-0.16 |
-0.08 |
4901.92 |
1 |
| Hybrids |
-0.19 |
0 |
0.04 |
-0.27 |
-0.21 |
-0.19 |
-0.16 |
-0.10 |
5002.25 |
1 |
| Animal pollinated |
0.91 |
0 |
0.05 |
0.82 |
0.88 |
0.91 |
0.94 |
1.00 |
4504.78 |
1 |
| Fleshy fruit |
-0.50 |
0 |
0.06 |
-0.62 |
-0.54 |
-0.50 |
-0.46 |
-0.38 |
4748.86 |
1 |
| Climate sum |
-0.17 |
0 |
0.02 |
-0.22 |
-0.19 |
-0.17 |
-0.16 |
-0.13 |
5093.39 |
1 |
| Herbaceous |
0.02 |
0 |
0.03 |
-0.05 |
-0.01 |
0.02 |
0.04 |
0.09 |
4940.66 |
1 |
| Annual |
0.05 |
0 |
0.03 |
-0.02 |
0.03 |
0.05 |
0.08 |
0.12 |
4901.13 |
1 |
| Axsexual |
-0.26 |
0 |
0.03 |
-0.31 |
-0.27 |
-0.26 |
-0.24 |
-0.20 |
5115.10 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nofam.tex")
##Posterior predictive checks
# first make posterior predictions
pp_threatened_nofam <- brms::posterior_predict(vet_threatened_nofam)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_nofam$data$threatened, pp_threatened_nofam[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_nofam <- apply(pp_threatened_nofam, 1, function(x) nrmse(pred=x,obs=vet_threatened_nofam$data$threatened, type="iq"))
mean(nrmse_threatened_nofam)
## [1] 1.140331
sd(nrmse_threatened_nofam)
## [1] 0.05214951
# RMSE
rmse_threatened_nofam <- apply(pp_threatened_nofam, 1, function(x) rmse(pred=x,obs=vet_threatened_nofam$data$threatened))
mean(rmse_threatened_nofam)
## [1] 23.9469
sd(rmse_threatened_nofam)
## [1] 1.095118
Full threat model: only non-brownian family effects
if (!file.exists("./vet_threatened_nophylo_10k.rds")) {
vet_threatened_nophylo <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family_name),
data=data_vetted, family=binomial(),
iter=10000, thin=4,
control=list(adapt_delta=0.95,max_treedepth=13),cores=4)
saveRDS(vet_threatened_nophylo, "./vet_threatened_nophylo_10k.rds")
} else { vet_threatened_nophylo <- readRDS("./vet_threatened_nophylo_10k.rds")}
outcome <- summary(vet_threatened_nophylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects")
vet_threatened_nophylo_plot <- stanplot(vet_threatened_nophylo, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_nophylo_plot

pdf("./plots_tables/vet_threatened_nophylo_plot.pdf", width=5, height=4)
vet_threatened_nophylo_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.74 |
0 |
0.26 |
1.24 |
1.57 |
1.73 |
1.92 |
2.27 |
3807.45 |
1 |
| Famly age |
1.01 |
0 |
0.21 |
0.60 |
0.87 |
1.01 |
1.15 |
1.43 |
3957.77 |
1 |
| Range size |
-0.82 |
0 |
0.23 |
-1.26 |
-0.97 |
-0.81 |
-0.66 |
-0.37 |
3648.97 |
1 |
| Hybrids |
-0.22 |
0 |
0.12 |
-0.46 |
-0.30 |
-0.21 |
-0.13 |
0.03 |
4253.90 |
1 |
| Animal pollinated |
0.71 |
0 |
0.20 |
0.32 |
0.57 |
0.71 |
0.84 |
1.09 |
4502.85 |
1 |
| Fleshy fruit |
-0.10 |
0 |
0.19 |
-0.48 |
-0.22 |
-0.10 |
0.03 |
0.27 |
4816.39 |
1 |
| Climate sum |
-0.05 |
0 |
0.08 |
-0.21 |
-0.11 |
-0.05 |
0.00 |
0.10 |
4231.89 |
1 |
| Herbaceous |
-0.05 |
0 |
0.16 |
-0.36 |
-0.15 |
-0.04 |
0.06 |
0.27 |
4199.60 |
1 |
| Annual |
0.06 |
0 |
0.17 |
-0.26 |
-0.06 |
0.06 |
0.17 |
0.39 |
3215.16 |
1 |
| Axsexual |
-0.10 |
0 |
0.14 |
-0.37 |
-0.20 |
-0.11 |
-0.01 |
0.18 |
3430.90 |
1 |
| SD brownian effects |
0.79 |
0 |
0.06 |
0.69 |
0.76 |
0.79 |
0.83 |
0.91 |
3732.53 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_nophylo.tex")
##Posterior predictive checks
pp_threatened_nophylo <- brms::posterior_predict(vet_threatened_nophylo)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_nophylo$data$threatened, pp_threatened_nophylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_nophylo <- apply(pp_threatened_nophylo, 1, function(x) nrmse(pred=x,obs=vet_threatened_nophylo$data$threatened, type="iq"))
mean(nrmse_threatened_nophylo)
## [1] 0.3115518
sd(nrmse_threatened_nophylo)
## [1] 0.03584301
# RMSE
rmse_threatened_nophylo <- apply(pp_threatened_nophylo, 1, function(x) rmse(pred=x,obs=vet_threatened_nophylo$data$threatened))
mean(rmse_threatened_nophylo)
## [1] 6.54262
sd(rmse_threatened_nophylo)
## [1] 0.7526886
Full threat model: only brownian family effects
if (!file.exists("./vet_threatened_phylo_10k.rds")) {
vet_threatened_phylo <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual+ (1|family),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95,max_treedepth=13),cores=4)
saveRDS(vet_threatened_phylo, "./vet_threatened_phylo_10k.rds")
} else { vet_threatened_phylo <- readRDS("./vet_threatened_phylo_10k.rds")}
outcome <- summary(vet_threatened_phylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects")
vet_threatened_phylo_plot <- stanplot(vet_threatened_phylo, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_phylo_plot

pdf("./plots_tables/vet_threatened_phylo_plot.pdf", width=5, height=4)
vet_threatened_phylo_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.51 |
0 |
0.25 |
1.03 |
1.35 |
1.51 |
1.67 |
2.02 |
3652.24 |
1 |
| Famly age |
0.86 |
0 |
0.23 |
0.41 |
0.71 |
0.85 |
1.01 |
1.30 |
3636.21 |
1 |
| Range size |
-0.66 |
0 |
0.22 |
-1.11 |
-0.81 |
-0.66 |
-0.51 |
-0.23 |
4159.18 |
1 |
| Hybrids |
-0.22 |
0 |
0.12 |
-0.45 |
-0.30 |
-0.22 |
-0.14 |
0.00 |
4655.58 |
1 |
| Animal pollinated |
0.62 |
0 |
0.21 |
0.20 |
0.47 |
0.61 |
0.76 |
1.05 |
4121.14 |
1 |
| Fleshy fruit |
-0.11 |
0 |
0.19 |
-0.50 |
-0.24 |
-0.11 |
0.02 |
0.26 |
4023.19 |
1 |
| Climate sum |
-0.04 |
0 |
0.08 |
-0.21 |
-0.10 |
-0.04 |
0.01 |
0.12 |
4214.25 |
1 |
| Herbaceous |
-0.02 |
0 |
0.18 |
-0.37 |
-0.15 |
-0.02 |
0.10 |
0.34 |
3794.82 |
1 |
| Annual |
0.08 |
0 |
0.16 |
-0.24 |
-0.03 |
0.08 |
0.19 |
0.41 |
3148.23 |
1 |
| Axsexual |
-0.13 |
0 |
0.14 |
-0.40 |
-0.22 |
-0.13 |
-0.03 |
0.14 |
3646.58 |
1 |
| SD brownian effects |
0.98 |
0 |
0.07 |
0.85 |
0.93 |
0.98 |
1.02 |
1.12 |
3759.27 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_phylo.tex")
##Posterior predictive checks
pp_threatened_phylo <- brms::posterior_predict(vet_threatened_phylo)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_phylo$data$threatened, pp_threatened_phylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_threatened_phylo <- apply(pp_threatened_phylo, 1, function(x) nrmse(pred=x,obs=vet_threatened_phylo$data$threatened, type="iq"))
mean(nrmse_threatened_phylo)
## [1] 0.3114918
sd(nrmse_threatened_phylo)
## [1] 0.03569686
# RMSE
rmse_threatened_phylo <- apply(pp_threatened_phylo, 1, function(x) rmse(pred=x,obs=vet_threatened_phylo$data$threatened))
mean(rmse_threatened_phylo)
## [1] 6.541305
sd(rmse_threatened_phylo)
## [1] 0.7495942
Full threat model (total species richness - not vetted by IUCN)
if (!file.exists("./nonvet_threatened_10k.rds")) {
nonvet_threatened <- brm(threatened | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
data=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.99,max_treedepth=13),cores=4)
saveRDS(nonvet_threatened, "./nonvet_threatened_10k.rds")
} else { nonvet_threatened <- readRDS("./nonvet_threatened_10k.rds")}
outcome <- summary(nonvet_threatened$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
nonvet_threatened_plot <- stanplot(nonvet_threatened, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
nonvet_threatened_plot

pdf("./plots_tables/nonvet_threatened_plot.pdf", width=5, height=4)
nonvet_threatened_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.71 |
0.00 |
0.32 |
0.11 |
0.50 |
0.70 |
0.92 |
1.34 |
4042.20 |
1.00 |
| Famly age |
0.48 |
0.00 |
0.25 |
0.00 |
0.31 |
0.48 |
0.65 |
0.97 |
4323.14 |
1.00 |
| Range size |
-0.33 |
0.00 |
0.28 |
-0.87 |
-0.52 |
-0.33 |
-0.14 |
0.23 |
4493.81 |
1.00 |
| Hybrids |
-0.04 |
0.00 |
0.15 |
-0.33 |
-0.14 |
-0.04 |
0.06 |
0.25 |
4416.26 |
1.00 |
| Animal pollinated |
0.15 |
0.00 |
0.26 |
-0.37 |
-0.02 |
0.15 |
0.32 |
0.65 |
4565.73 |
1.00 |
| Fleshy fruit |
-0.07 |
0.00 |
0.23 |
-0.51 |
-0.23 |
-0.07 |
0.08 |
0.37 |
4374.02 |
1.00 |
| Climate sum |
-0.27 |
0.00 |
0.10 |
-0.47 |
-0.34 |
-0.27 |
-0.20 |
-0.07 |
4492.38 |
1.00 |
| Herbaceous |
-0.80 |
0.00 |
0.21 |
-1.22 |
-0.95 |
-0.81 |
-0.66 |
-0.38 |
3410.76 |
1.00 |
| Annual |
-0.41 |
0.00 |
0.21 |
-0.82 |
-0.55 |
-0.41 |
-0.27 |
-0.01 |
4601.07 |
1.00 |
| Axsexual |
0.23 |
0.00 |
0.18 |
-0.13 |
0.10 |
0.23 |
0.35 |
0.59 |
4197.47 |
1.00 |
| SD brownian effects |
0.49 |
0.01 |
0.25 |
0.04 |
0.30 |
0.50 |
0.67 |
0.97 |
373.84 |
1.02 |
| SD family specific effects |
0.99 |
0.00 |
0.11 |
0.74 |
0.92 |
1.00 |
1.07 |
1.19 |
689.53 |
1.01 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/nonvet_threatened.tex")
##Posterior predictive checks
pp_threatened <- brms::posterior_predict(nonvet_threatened)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=nonvet_threatened$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nonvet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=nonvet_threatened$data$threatened, type="iq"))
mean(nrmse_nonvet)
## [1] 0.4715996
sd(nrmse_nonvet)
## [1] 0.05317477
# RMSE
rmse_nonvet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=nonvet_threatened$data$threatened))
mean(rmse_nonvet)
## [1] 7.54555
sd(rmse_nonvet)
## [1] 0.8506925
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(nonvet_threatened, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.23 0.17 0 0.62 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Full models with MOM diversification estimator
Full threat model (IUCN vetted species) - MOM
if (!file.exists("./vet_threatened_MOM_10k.rds")) {
vet_threatened_MOM <- brm(threatened | trials(vetted) ~ diversification_mom_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.98,max_treedepth=13),cores=4)
# no divergent transitions
saveRDS(vet_threatened_MOM, "./vet_threatened_MOM_10k.rds")
} else { vet_threatened_MOM <- readRDS("./vet_threatened_MOM_10k.rds")}
# prior_summary(vet_threatened_MOM, all=FALSE)
outcome <- summary(vet_threatened_MOM$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate (MOM)","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
vet_threatened_MOM_plot <- stanplot(vet_threatened_MOM, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_MOM_plot

pdf("./plots_tables/vet_threatened_MOM_plot.pdf", width=5, height=4)
vet_threatened_MOM_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate (MOM) |
1.40 |
0.00 |
0.22 |
0.97 |
1.25 |
1.40 |
1.55 |
1.85 |
4187.89 |
1.00 |
| Famly age |
0.65 |
0.00 |
0.19 |
0.27 |
0.52 |
0.65 |
0.78 |
1.02 |
4370.33 |
1.00 |
| Range size |
-0.75 |
0.00 |
0.23 |
-1.20 |
-0.91 |
-0.74 |
-0.58 |
-0.29 |
5028.09 |
1.00 |
| Hybrids |
-0.21 |
0.00 |
0.12 |
-0.45 |
-0.29 |
-0.21 |
-0.13 |
0.03 |
4951.56 |
1.00 |
| Animal pollinated |
0.70 |
0.00 |
0.21 |
0.30 |
0.56 |
0.70 |
0.85 |
1.11 |
4724.15 |
1.00 |
| Fleshy fruit |
-0.13 |
0.00 |
0.19 |
-0.51 |
-0.26 |
-0.13 |
0.00 |
0.26 |
4792.09 |
1.00 |
| Climate sum |
-0.05 |
0.00 |
0.08 |
-0.22 |
-0.11 |
-0.05 |
0.01 |
0.11 |
5133.96 |
1.00 |
| Herbaceous |
-0.02 |
0.00 |
0.18 |
-0.37 |
-0.14 |
-0.02 |
0.09 |
0.33 |
5114.28 |
1.00 |
| Annual |
0.05 |
0.00 |
0.17 |
-0.27 |
-0.06 |
0.05 |
0.16 |
0.39 |
4683.03 |
1.00 |
| Axsexual |
-0.14 |
0.00 |
0.14 |
-0.42 |
-0.23 |
-0.14 |
-0.04 |
0.14 |
4964.23 |
1.00 |
| SD brownian effects |
0.52 |
0.01 |
0.24 |
0.05 |
0.35 |
0.53 |
0.70 |
0.96 |
519.18 |
1.01 |
| SD family specific effects |
0.62 |
0.01 |
0.16 |
0.20 |
0.54 |
0.65 |
0.74 |
0.85 |
577.42 |
1.00 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_MOM.tex")
##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_MOM)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_MOM$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_MOM$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3100034
sd(nrmse_vet)
## [1] 0.03550366
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_MOM$data$threatened))
mean(rmse_vet)
## [1] 6.509996
sd(rmse_vet)
## [1] 0.7455832
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_MOM, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.42 0.27 0 0.96 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)
Naturalized model - MOM
if (!file.exists("./fit_nat_MOM_10k.rds")) {
fit_nat_MOM <- brm(naturalized | trials(species) ~ diversification_mom_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + (1|family) + (1|family_name),
data=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95, max_treedepth=11),cores=4)
saveRDS(fit_nat_MOM, "./fit_nat_MOM_10k.rds")
} else { fit_nat_MOM <- readRDS("./fit_nat_MOM_10k.rds")}
outcome <- summary(fit_nat_MOM$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate (MOM)","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","SD brownian effects","SD family specific effects")
fit_nat_MOM_plot <- stanplot(fit_nat_MOM, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_MOM_plot

pdf("./plots_tables/fit_nat_MOM_plot.pdf", width=5, height=4)
fit_nat_MOM_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate (MOM) |
-2.00 |
0.00 |
0.25 |
-2.49 |
-2.17 |
-2.00 |
-1.83 |
-1.51 |
4574.00 |
1 |
| Famly age |
-1.12 |
0.00 |
0.23 |
-1.56 |
-1.27 |
-1.11 |
-0.96 |
-0.68 |
4865.63 |
1 |
| Range size |
1.58 |
0.00 |
0.27 |
1.05 |
1.40 |
1.57 |
1.76 |
2.13 |
4432.73 |
1 |
| Hybrids |
0.25 |
0.00 |
0.13 |
-0.01 |
0.16 |
0.25 |
0.33 |
0.50 |
4760.78 |
1 |
| Animal pollinated |
-0.47 |
0.00 |
0.28 |
-1.02 |
-0.65 |
-0.46 |
-0.28 |
0.07 |
4641.26 |
1 |
| Fleshy fruit |
-0.03 |
0.00 |
0.24 |
-0.51 |
-0.19 |
-0.03 |
0.13 |
0.44 |
4890.97 |
1 |
| Climate sum |
0.18 |
0.00 |
0.10 |
-0.02 |
0.11 |
0.18 |
0.24 |
0.38 |
5019.20 |
1 |
| Herbaceous |
0.66 |
0.00 |
0.22 |
0.22 |
0.52 |
0.66 |
0.81 |
1.10 |
4700.80 |
1 |
| Annual |
0.11 |
0.00 |
0.20 |
-0.27 |
-0.02 |
0.11 |
0.24 |
0.50 |
4658.60 |
1 |
| Axsexual |
0.41 |
0.00 |
0.19 |
0.05 |
0.29 |
0.41 |
0.53 |
0.77 |
4937.08 |
1 |
| SD brownian effects |
1.21 |
0.01 |
0.18 |
0.83 |
1.09 |
1.22 |
1.33 |
1.51 |
1092.51 |
1 |
| SD family specific effects |
0.51 |
0.01 |
0.22 |
0.04 |
0.36 |
0.54 |
0.68 |
0.89 |
653.70 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat_MOM.tex")
##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat_MOM)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=fit_nat_MOM$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat_MOM$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.5091612
sd(nrmse_nat)
## [1] 0.0639227
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat_MOM$data$naturalized))
mean(rmse_nat)
## [1] 7.128302
sd(rmse_nat)
## [1] 0.894913
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat_MOM, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.81 0.14 0.49 1 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Full models with number of horticultural species
Full threat model (IUCN vetted species) - Horticultural use
if (!file.exists("./vet_threatened_hort_10k.rds")) {
vet_threatened_hort <- brm(threatened | trials(vetted) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + hort.species + (1|family) + (1|family_name),
data=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.98,max_treedepth=13),cores=4)
# no divergent transitions
saveRDS(vet_threatened_hort, "./vet_threatened_hort_10k.rds")
} else { vet_threatened_hort <- readRDS("./vet_threatened_hort_10k.rds")}
# prior_summary(vet_threatened_hort, all=FALSE)
outcome <- summary(vet_threatened_hort$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual","Horticultural species", "SD brownian effects","SD family specific effects")
vet_threatened_hort_plot <- stanplot(vet_threatened_hort, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion threatened species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
vet_threatened_hort_plot

pdf("./plots_tables/vet_threatened_hort_plot.pdf", width=5, height=4)
vet_threatened_hort_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
1.72 |
0.00 |
0.29 |
1.16 |
1.53 |
1.72 |
1.92 |
2.29 |
3387.75 |
1.00 |
| Famly age |
0.96 |
0.00 |
0.23 |
0.52 |
0.80 |
0.96 |
1.11 |
1.40 |
3284.76 |
1.00 |
| Range size |
-0.66 |
0.00 |
0.24 |
-1.11 |
-0.82 |
-0.66 |
-0.49 |
-0.19 |
4487.16 |
1.00 |
| Hybrids |
-0.21 |
0.00 |
0.12 |
-0.44 |
-0.29 |
-0.21 |
-0.13 |
0.02 |
4580.59 |
1.00 |
| Animal pollinated |
0.64 |
0.00 |
0.21 |
0.24 |
0.50 |
0.64 |
0.78 |
1.06 |
4008.94 |
1.00 |
| Fleshy fruit |
-0.08 |
0.00 |
0.19 |
-0.47 |
-0.21 |
-0.08 |
0.04 |
0.29 |
4554.09 |
1.00 |
| Climate sum |
-0.03 |
0.00 |
0.08 |
-0.20 |
-0.09 |
-0.03 |
0.03 |
0.13 |
4676.35 |
1.00 |
| Herbaceous |
-0.02 |
0.00 |
0.18 |
-0.37 |
-0.14 |
-0.02 |
0.09 |
0.33 |
4747.66 |
1.00 |
| Annual |
0.09 |
0.00 |
0.17 |
-0.23 |
-0.02 |
0.09 |
0.20 |
0.42 |
4483.79 |
1.00 |
| Axsexual |
-0.08 |
0.00 |
0.14 |
-0.37 |
-0.18 |
-0.08 |
0.01 |
0.20 |
4618.21 |
1.00 |
| Horticultural species |
-0.21 |
0.00 |
0.23 |
-0.66 |
-0.36 |
-0.21 |
-0.05 |
0.24 |
4067.48 |
1.00 |
| SD brownian effects |
0.46 |
0.01 |
0.25 |
0.03 |
0.27 |
0.47 |
0.65 |
0.92 |
313.14 |
1.01 |
| SD family specific effects |
0.64 |
0.01 |
0.15 |
0.25 |
0.57 |
0.67 |
0.75 |
0.86 |
359.42 |
1.01 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_threatened_hort.tex")
##Posterior predictive checks
# first make posterior predictions
pp_threatened <- brms::posterior_predict(vet_threatened_hort)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_threatened_hort$data$threatened, pp_threatened[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_threatened, 1, function(x) nrmse(pred=x,obs=vet_threatened_hort$data$threatened, type="iq"))
mean(nrmse_vet)
## [1] 0.3113806
sd(nrmse_vet)
## [1] 0.03590083
# RMSE
rmse_vet <- apply(pp_threatened, 1, function(x) rmse(pred=x,obs=vet_threatened_hort$data$threatened))
mean(rmse_vet)
## [1] 6.538981
sd(rmse_vet)
## [1] 0.7538275
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_threatened_hort, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.36 0.27 0 0.93 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)
Naturalized model - horticultural species
if (!file.exists("./fit_nat_hort_10k.rds")) {
fit_nat_hort <- brm(naturalized | trials(species) ~ diversification_scaled + age_scaled + range.size + hybrids + animal + fleshy.fruit + climate.sum + herbaceous + annual + asexual + hort.species + (1|family) + (1|family_name),
data=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.9, max_treedepth=11),cores=4)
saveRDS(fit_nat_hort, "./fit_nat_hort_10k.rds")
} else { fit_nat_hort <- readRDS("./fit_nat_hort_10k.rds")}
outcome <- summary(fit_nat_hort$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Range size", "Hybrids","Animal pollinated", "Fleshy fruit", "Climate sum","Herbaceous","Annual","Axsexual", "Horticultural species", "SD brownian effects","SD family specific effects")
fit_nat_hort_plot <- stanplot(fit_nat_hort, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Warning: Method 'stanplot' is deprecated. Please use 'mcmc_plot' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
fit_nat_hort_plot

pdf("./plots_tables/fit_nat_hort_plot.pdf", width=5, height=4)
fit_nat_hort_plot + ggtitle("")
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
-4.00 |
0.00 |
0.25 |
-4.48 |
-4.18 |
-4.01 |
-3.84 |
-3.50 |
3483.62 |
1 |
| Famly age |
-2.31 |
0.00 |
0.19 |
-2.68 |
-2.45 |
-2.32 |
-2.18 |
-1.93 |
4079.04 |
1 |
| Range size |
0.25 |
0.00 |
0.23 |
-0.20 |
0.09 |
0.25 |
0.40 |
0.68 |
4774.36 |
1 |
| Hybrids |
0.19 |
0.00 |
0.10 |
-0.01 |
0.12 |
0.19 |
0.25 |
0.37 |
4804.81 |
1 |
| Animal pollinated |
0.03 |
0.00 |
0.21 |
-0.38 |
-0.10 |
0.03 |
0.17 |
0.44 |
4811.80 |
1 |
| Fleshy fruit |
-0.16 |
0.00 |
0.19 |
-0.54 |
-0.29 |
-0.16 |
-0.03 |
0.21 |
3789.03 |
1 |
| Climate sum |
-0.03 |
0.00 |
0.08 |
-0.19 |
-0.08 |
-0.03 |
0.03 |
0.13 |
4748.73 |
1 |
| Herbaceous |
0.56 |
0.00 |
0.16 |
0.24 |
0.45 |
0.56 |
0.67 |
0.89 |
4646.70 |
1 |
| Annual |
-0.30 |
0.00 |
0.15 |
-0.59 |
-0.41 |
-0.30 |
-0.20 |
-0.02 |
4679.69 |
1 |
| Axsexual |
-0.06 |
0.00 |
0.14 |
-0.33 |
-0.15 |
-0.06 |
0.03 |
0.20 |
4470.14 |
1 |
| Horticultural species |
3.32 |
0.00 |
0.25 |
2.82 |
3.14 |
3.32 |
3.49 |
3.80 |
4728.62 |
1 |
| SD brownian effects |
0.60 |
0.01 |
0.20 |
0.13 |
0.48 |
0.62 |
0.74 |
0.96 |
662.18 |
1 |
| SD family specific effects |
0.55 |
0.01 |
0.15 |
0.16 |
0.48 |
0.57 |
0.65 |
0.78 |
643.33 |
1 |
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat_hort.tex")
##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat_hort)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=fit_nat_hort$data$naturalized, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat_hort$data$naturalized, type="iq"))
mean(nrmse_nat)
## [1] 0.5103294
sd(nrmse_nat)
## [1] 0.0656822
# RMSE
rmse_nat <- apply(pp_nat, 1, function(x) rmse(pred=x,obs=fit_nat_hort$data$naturalized))
mean(rmse_nat)
## [1] 7.144582
sd(rmse_nat)
## [1] 0.9195189
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat_hort, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.52 0.24 0.03 0.97 NA
## Post.Prob Star
## 1 NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.